Materials for this class are adapted from the Johns Hopkins Coursera class on EDA by Prof. Roger Peng
Drawbacks:
library(datasets)
data(cars)
with(cars, plot(speed,dist))
Drawbacks:
library(lattice)
state <- data.frame(state.x77, region=state.region)
xyplot(Life.Exp ~ Income | region, data=state, layout = c(4,1))
ggplot2library(ggplot2)
data(mpg)
qplot(displ, hwy, data = mpg)
ggplot(mpg, aes(x=displ, y=hwy)) +
geom_point() +
xlab("Displacement") +
ylab("Highway mileage")
library(datasets)
hist(AirPassengers)
hist(mtcars$disp)
hist(mtcars$disp, breaks=100, col="Green")
## density instead of frequency
hist(mtcars$disp, breaks=100, col="Green", freq=FALSE)
## density plot
d <- density(mtcars$mpg)
plot(d) # plots the results
## filled density plot
d <- density(mtcars$mpg)
plot(d, main="Kernel Density of Miles Per Gallon")
polygon(d, col="red", border="blue")
Many base plotting functions share a set of parameters. Here are a few key ones:
pch: the plotting symbol (default is open circle)lty: the line type (default is solid line), can be dashed, dotted, etc.lwd: the line width, specified as an integer multiplecol: the plotting color, specified as a number, string, or hex code; the colors() function gives you a vector of colors by namexlab: character string for the x-axis labelylab: character string for the y-axis label## population vs income
plot(state.x77[,1], state.x77[,2])
#### adding x and y labels - xlab and ylab
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income")
#### adding color - col
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6))
#### pch
## changing type of point using pch
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), pch=20)
## different point types for different variables
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), pch=c(3,20))
#### cex
## controlling size of symbols using ces
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), pch=c(3,20), cex=0.8)
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), pch=c(3,20), cex=1.2)
## line plots
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), type="l")
## points and lines
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), type="b")
## line type
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), type="b", lty=2)
## different line type
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), type="b", lty=4)
## line width
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6), type="b", lty=4, lwd=2)
## abline
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6))
abline(h=4000,col="red")
abline(v=7000,col="blue", lty=3, lwd=4)
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6))
model <- lm(state.x77[,2] ~ state.x77[,1])
abline(model, lwd=2, lty=3)
## Example of labeling points
plot(mtcars$wt, mtcars$mpg, main="Mileage vs. Car Weight",
xlab="Weight", ylab="Mileage", pch=18, col="blue")
text(mtcars$wt, mtcars$mpg, row.names(mtcars), cex=0.6, pos=4, col="red")
type= valuesx <- c(1:5); y <- x # create some data
par(pch=22, col="red") # plotting symbol and color
par(mfrow=c(2,4)) # all plots on one page
opts = c("p","l","o","b","c","s","S","h")
for(i in 1:length(opts)){
heading = paste("type=",opts[i])
plot(x, y, type="n", main=heading)
lines(x, y, type=opts[i])
}
References:
library(datasets)
boxplot(state.x77)
boxplot(scale(state.x77))
## population
boxplot(state.x77[,1], ylab="Population")
title("Boxplot of State Populations")
# Boxplot of MPG by Car Cylinders
boxplot(mpg~cyl,data=mtcars, main="Car Milage Data",
xlab="Number of Cylinders", ylab="Miles Per Gallon")
## Row-wise
par(mfrow=c(2,1))
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6))
plot(state.x77[,1], ylab="Population")
## Column-wise
par(mfcol=c(1,2))
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=c(3,6))
plot(state.x77[,1], ylab="Population")
matplotJohnsonJohnson
## Qtr1 Qtr2 Qtr3 Qtr4
## 1960 0.71 0.63 0.85 0.44
## 1961 0.61 0.69 0.92 0.55
## 1962 0.72 0.77 0.92 0.60
## 1963 0.83 0.80 1.00 0.77
## 1964 0.92 1.00 1.24 1.00
## 1965 1.16 1.30 1.45 1.25
## 1966 1.26 1.38 1.86 1.56
## 1967 1.53 1.59 1.83 1.86
## 1968 1.53 2.07 2.34 2.25
## 1969 2.16 2.43 2.70 2.25
## 1970 2.79 3.42 3.69 3.60
## 1971 3.60 4.32 4.32 4.05
## 1972 4.86 5.04 5.04 4.41
## 1973 5.58 5.85 6.57 5.31
## 1974 6.03 6.39 6.93 5.85
## 1975 6.93 7.74 7.83 6.12
## 1976 7.74 8.91 8.28 6.84
## 1977 9.54 10.26 9.54 8.73
## 1978 11.88 12.06 12.15 8.91
## 1979 14.04 12.96 14.85 9.99
## 1980 16.20 14.67 16.02 11.61
class(JohnsonJohnson)
## [1] "ts"
m <- matrix(JohnsonJohnson, ncol=4, byrow = TRUE)
m
## [,1] [,2] [,3] [,4]
## [1,] 0.71 0.63 0.85 0.44
## [2,] 0.61 0.69 0.92 0.55
## [3,] 0.72 0.77 0.92 0.60
## [4,] 0.83 0.80 1.00 0.77
## [5,] 0.92 1.00 1.24 1.00
## [6,] 1.16 1.30 1.45 1.25
## [7,] 1.26 1.38 1.86 1.56
## [8,] 1.53 1.59 1.83 1.86
## [9,] 1.53 2.07 2.34 2.25
## [10,] 2.16 2.43 2.70 2.25
## [11,] 2.79 3.42 3.69 3.60
## [12,] 3.60 4.32 4.32 4.05
## [13,] 4.86 5.04 5.04 4.41
## [14,] 5.58 5.85 6.57 5.31
## [15,] 6.03 6.39 6.93 5.85
## [16,] 6.93 7.74 7.83 6.12
## [17,] 7.74 8.91 8.28 6.84
## [18,] 9.54 10.26 9.54 8.73
## [19,] 11.88 12.06 12.15 8.91
## [20,] 14.04 12.96 14.85 9.99
## [21,] 16.20 14.67 16.02 11.61
matplot(m, type="l")
matplot(m, type="l")
Q-Q Plots# Q-Q plots
x1 <- rnorm(100)
qqnorm(x1)
qqline(x1)
# Comparing 2 distributions
par(mfrow=c(1,2))
x <- rt(100, df=3)
# normal fit
qqnorm(x); qqline(x)
# t(3Df) fit
qqplot(rt(1000,df=3), x, main="t(3) Q-Q Plot",
ylab="Sample Quantiles")
abline(0,1)
Interpreting QQ Plots: http://stats.stackexchange.com/a/101290/21450